{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "de6a61f0-9931-4da5-b032-6fa5e73d6907",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 步骤1：数据提取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "831df832-d26f-45ca-8e06-945549b4ae50",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "55f71dc2-022a-4dd3-adc1-5a8d97889950",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>bmi</th>\n",
       "      <th>children</th>\n",
       "      <th>smoker</th>\n",
       "      <th>region</th>\n",
       "      <th>charges</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "      <td>female</td>\n",
       "      <td>27.900</td>\n",
       "      <td>0</td>\n",
       "      <td>yes</td>\n",
       "      <td>southwest</td>\n",
       "      <td>16884.92400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18</td>\n",
       "      <td>male</td>\n",
       "      <td>33.770</td>\n",
       "      <td>1</td>\n",
       "      <td>no</td>\n",
       "      <td>southeast</td>\n",
       "      <td>1725.55230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28</td>\n",
       "      <td>male</td>\n",
       "      <td>33.000</td>\n",
       "      <td>3</td>\n",
       "      <td>no</td>\n",
       "      <td>southeast</td>\n",
       "      <td>4449.46200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>male</td>\n",
       "      <td>22.705</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "      <td>northwest</td>\n",
       "      <td>21984.47061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>32</td>\n",
       "      <td>male</td>\n",
       "      <td>28.880</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "      <td>northwest</td>\n",
       "      <td>3866.85520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>31</td>\n",
       "      <td>female</td>\n",
       "      <td>25.740</td>\n",
       "      <td>0</td>\n",
       "      <td>no</td>\n",
       "      <td>southeast</td>\n",
       "      <td>3756.62160</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age     sex     bmi  children smoker     region      charges\n",
       "0   19  female  27.900         0    yes  southwest  16884.92400\n",
       "1   18    male  33.770         1     no  southeast   1725.55230\n",
       "2   28    male  33.000         3     no  southeast   4449.46200\n",
       "3   33    male  22.705         0     no  northwest  21984.47061\n",
       "4   32    male  28.880         0     no  northwest   3866.85520\n",
       "5   31  female  25.740         0     no  southeast   3756.62160"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=pd.read_csv('./data/insurance.csv',sep=',') # 按Tab键补齐   读csv的路径，sep指明文件数据的分隔符  \n",
    "data.head(n=6) # 取出前6行看看"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "05ac832e-8d7c-4863-b5f4-a650ec8707ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 单行来看，前6个数据就是特征x,而最后的花费charges就是y 是有监督学习\n",
    "# y是连续的浮点数，要做回归任务,而不是分类 \n",
    "# 目标为预测出y=f(x)中的f\n",
    "# 特征x需要对y有影响，比如姓名，用户ID和花费y没任何关系，不需要处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "93464366-26e4-4670-bc31-0ffc060d10f5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "3f0d97eb-31fe-4523-b96d-28d1aef12e15",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 步骤2：EDA数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "de5b3737-bf9f-4b38-b2f5-9c9d7122b967",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "bdd358f3-95c4-4f7d-a365-76cc6eebca1a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAIiFJREFUeJzt3W9wVOXd//HPSpIlxGRLCOyyJUKsGf8lUA02ktqCBkIZ/tShU1AoxZEHIH8kP2AQ5IFpp02QewraodKRMoJQTOcejaUDKmHUIBNQDDAmYCkOAYJkjWLcBIwbDNfvATdnugQiC8G9kn2/ZnamOeebzXWuqnnPyW7iMsYYAQAAWOSmaC8AAADgUgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOvERXsB1+L8+fM6deqUkpOT5XK5or0cAABwFYwxam5ult/v1003dXyPpEsGyqlTp5Senh7tZQAAgGtQV1enAQMGdDjTJQMlOTlZ0oULTElJifJqAADA1WhqalJ6errzfbwjXTJQLv5YJyUlhUABAKCLuZqXZ/AiWQAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWCcu2guw0aAlW6O9hIgdWz422ksAAKDTcAcFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUiCpSioiK5XK6wh8/nc84bY1RUVCS/36/ExESNGDFCBw8eDHuOUCikefPmKS0tTUlJSZowYYJOnjzZOVcDAAC6hYjvoNx9992qr693HtXV1c65FStWaOXKlVq9erX27t0rn8+nUaNGqbm52ZkpLCxUWVmZSktLtWvXLp05c0bjxo1TW1tb51wRAADo8uIi/oS4uLC7JhcZY/Tcc89p2bJlmjhxoiRpw4YN8nq92rx5s2bOnKlgMKh169Zp48aNGjlypCRp06ZNSk9P144dOzR69OjrvBwAANAdRHwH5ciRI/L7/crIyNAjjzyio0ePSpJqa2sVCARUUFDgzLrdbg0fPlyVlZWSpKqqKp07dy5sxu/3Kysry5m5nFAopKamprAHAADoviIKlNzcXL388st66623tHbtWgUCAeXl5en06dMKBAKSJK/XG/Y5Xq/XORcIBJSQkKDevXtfceZySkpK5PF4nEd6enokywYAAF1MRIEyZswY/epXv1J2drZGjhyprVu3Srrwo5yLXC5X2OcYY9odu9R3zSxdulTBYNB51NXVRbJsAADQxVzX24yTkpKUnZ2tI0eOOK9LufROSENDg3NXxefzqbW1VY2NjVecuRy3262UlJSwBwAA6L6uK1BCoZA+/vhj9e/fXxkZGfL5fCovL3fOt7a2qqKiQnl5eZKknJwcxcfHh83U19erpqbGmQEAAIjoXTyLFi3S+PHjdcstt6ihoUF/+MMf1NTUpOnTp8vlcqmwsFDFxcXKzMxUZmamiouL1atXL02ZMkWS5PF4NGPGDC1cuFB9+vRRamqqFi1a5PzICAAAQIowUE6ePKlHH31UX3zxhfr27av7779fe/bs0cCBAyVJixcvVktLi2bPnq3Gxkbl5uZq+/btSk5Odp5j1apViouL06RJk9TS0qL8/HytX79ePXr06NwrAwAAXZbLGGOivYhINTU1yePxKBgM3pDXowxasrXTn/NGO7Z8bLSXAABAhyL5/s3f4gEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAda4rUEpKSuRyuVRYWOgcM8aoqKhIfr9fiYmJGjFihA4ePBj2eaFQSPPmzVNaWpqSkpI0YcIEnTx58nqWAgAAupFrDpS9e/fqxRdf1ODBg8OOr1ixQitXrtTq1au1d+9e+Xw+jRo1Ss3Nzc5MYWGhysrKVFpaql27dunMmTMaN26c2trarv1KAABAt3FNgXLmzBlNnTpVa9euVe/evZ3jxhg999xzWrZsmSZOnKisrCxt2LBBX3/9tTZv3ixJCgaDWrdunf70pz9p5MiRuueee7Rp0yZVV1drx44dnXNVAACgS7umQJkzZ47Gjh2rkSNHhh2vra1VIBBQQUGBc8ztdmv48OGqrKyUJFVVVencuXNhM36/X1lZWc4MAACIbXGRfkJpaan27dunvXv3tjsXCAQkSV6vN+y41+vV8ePHnZmEhISwOy8XZy5+/qVCoZBCoZDzcVNTU6TLBgAAXUhEd1Dq6uo0f/58bdq0ST179rzinMvlCvvYGNPu2KU6mikpKZHH43Ee6enpkSwbAAB0MRHdQamqqlJDQ4NycnKcY21tbdq5c6dWr16tw4cPS7pwl6R///7OTENDg3NXxefzqbW1VY2NjWF3URoaGpSXl3fZr7t06VItWLDA+bipqYlIucSgJVujvYSIHVs+NtpLAABYKqI7KPn5+aqurtaBAwecx9ChQzV16lQdOHBAt956q3w+n8rLy53PaW1tVUVFhRMfOTk5io+PD5upr69XTU3NFQPF7XYrJSUl7AEAALqviO6gJCcnKysrK+xYUlKS+vTp4xwvLCxUcXGxMjMzlZmZqeLiYvXq1UtTpkyRJHk8Hs2YMUMLFy5Unz59lJqaqkWLFik7O7vdi24BAEBsivhFst9l8eLFamlp0ezZs9XY2Kjc3Fxt375dycnJzsyqVasUFxenSZMmqaWlRfn5+Vq/fr169OjR2csBAABdkMsYY6K9iEg1NTXJ4/EoGAzekB/3dMXXc3RFvAYFAGJLJN+/+Vs8AADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKwTUaCsWbNGgwcPVkpKilJSUjRs2DC98cYbznljjIqKiuT3+5WYmKgRI0bo4MGDYc8RCoU0b948paWlKSkpSRMmTNDJkyc752oAAEC3EFGgDBgwQMuXL9eHH36oDz/8UA899JB++ctfOhGyYsUKrVy5UqtXr9bevXvl8/k0atQoNTc3O89RWFiosrIylZaWateuXTpz5ozGjRuntra2zr0yAADQZbmMMeZ6niA1NVX/8z//o8cff1x+v1+FhYV66qmnJF24W+L1evXss89q5syZCgaD6tu3rzZu3KjJkydLkk6dOqX09HRt27ZNo0ePvqqv2dTUJI/Ho2AwqJSUlOtZ/mUNWrK1058T7R1bPjbaSwAAfI8i+f59za9BaWtrU2lpqc6ePathw4aptrZWgUBABQUFzozb7dbw4cNVWVkpSaqqqtK5c+fCZvx+v7KyspyZywmFQmpqagp7AACA7iviQKmurtbNN98st9utWbNmqaysTHfddZcCgYAkyev1hs17vV7nXCAQUEJCgnr37n3FmcspKSmRx+NxHunp6ZEuGwAAdCERB8rtt9+uAwcOaM+ePXriiSc0ffp0HTp0yDnvcrnC5o0x7Y5d6rtmli5dqmAw6Dzq6uoiXTYAAOhCIg6UhIQE3XbbbRo6dKhKSko0ZMgQPf/88/L5fJLU7k5IQ0ODc1fF5/OptbVVjY2NV5y5HLfb7bxz6OIDAAB0X9f9e1CMMQqFQsrIyJDP51N5eblzrrW1VRUVFcrLy5Mk5eTkKD4+Pmymvr5eNTU1zgwAAEBcJMNPP/20xowZo/T0dDU3N6u0tFTvvvuu3nzzTblcLhUWFqq4uFiZmZnKzMxUcXGxevXqpSlTpkiSPB6PZsyYoYULF6pPnz5KTU3VokWLlJ2drZEjR96QCwQAAF1PRIHy2Wefadq0aaqvr5fH49HgwYP15ptvatSoUZKkxYsXq6WlRbNnz1ZjY6Nyc3O1fft2JScnO8+xatUqxcXFadKkSWppaVF+fr7Wr1+vHj16dO6VAQCALuu6fw9KNPB7ULoHfg8KAMSW7+X3oAAAANwoBAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArBNRoJSUlOi+++5TcnKy+vXrp4cffliHDx8OmzHGqKioSH6/X4mJiRoxYoQOHjwYNhMKhTRv3jylpaUpKSlJEyZM0MmTJ6//agAAQLcQUaBUVFRozpw52rNnj8rLy/Xtt9+qoKBAZ8+edWZWrFihlStXavXq1dq7d698Pp9GjRql5uZmZ6awsFBlZWUqLS3Vrl27dObMGY0bN05tbW2dd2UAAKDLchljzLV+8ueff65+/fqpoqJCP//5z2WMkd/vV2FhoZ566ilJF+6WeL1ePfvss5o5c6aCwaD69u2rjRs3avLkyZKkU6dOKT09Xdu2bdPo0aO/8+s2NTXJ4/EoGAwqJSXlWpd/RYOWbO3050R7x5aPjfYSAADfo0i+f1/Xa1CCwaAkKTU1VZJUW1urQCCggoICZ8btdmv48OGqrKyUJFVVVencuXNhM36/X1lZWc7MpUKhkJqamsIeAACg+7rmQDHGaMGCBXrggQeUlZUlSQoEApIkr9cbNuv1ep1zgUBACQkJ6t279xVnLlVSUiKPx+M80tPTr3XZAACgC7jmQJk7d64++ugjvfLKK+3OuVyusI+NMe2OXaqjmaVLlyoYDDqPurq6a102AADoAq4pUObNm6ctW7bonXfe0YABA5zjPp9PktrdCWloaHDuqvh8PrW2tqqxsfGKM5dyu91KSUkJewAAgO4rokAxxmju3Ll67bXX9PbbbysjIyPsfEZGhnw+n8rLy51jra2tqqioUF5eniQpJydH8fHxYTP19fWqqalxZgAAQGyLi2R4zpw52rx5s/75z38qOTnZuVPi8XiUmJgol8ulwsJCFRcXKzMzU5mZmSouLlavXr00ZcoUZ3bGjBlauHCh+vTpo9TUVC1atEjZ2dkaOXJk518hAADociIKlDVr1kiSRowYEXb8pZde0mOPPSZJWrx4sVpaWjR79mw1NjYqNzdX27dvV3JysjO/atUqxcXFadKkSWppaVF+fr7Wr1+vHj16XN/VAACAbuG6fg9KtPB7ULoHfg8KAMSW7+33oAAAANwIBAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOvERXsBiF2DlmyN9hIidmz52GgvAQBiAndQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdSIOlJ07d2r8+PHy+/1yuVx6/fXXw84bY1RUVCS/36/ExESNGDFCBw8eDJsJhUKaN2+e0tLSlJSUpAkTJujkyZPXdSEAAKD7iDhQzp49qyFDhmj16tWXPb9ixQqtXLlSq1ev1t69e+Xz+TRq1Cg1Nzc7M4WFhSorK1Npaal27dqlM2fOaNy4cWpra7v2KwEAAN1GXKSfMGbMGI0ZM+ay54wxeu6557Rs2TJNnDhRkrRhwwZ5vV5t3rxZM2fOVDAY1Lp167Rx40aNHDlSkrRp0yalp6drx44dGj169HVcDgAA6A469TUotbW1CgQCKigocI653W4NHz5clZWVkqSqqiqdO3cubMbv9ysrK8uZAQAAsS3iOygdCQQCkiSv1xt23Ov16vjx485MQkKCevfu3W7m4udfKhQKKRQKOR83NTV15rIBAIBlbsi7eFwuV9jHxph2xy7V0UxJSYk8Ho/zSE9P77S1AgAA+3RqoPh8PklqdyekoaHBuavi8/nU2tqqxsbGK85caunSpQoGg86jrq6uM5cNAAAs06mBkpGRIZ/Pp/LycudYa2urKioqlJeXJ0nKyclRfHx82Ex9fb1qamqcmUu53W6lpKSEPQAAQPcV8WtQzpw5o08++cT5uLa2VgcOHFBqaqpuueUWFRYWqri4WJmZmcrMzFRxcbF69eqlKVOmSJI8Ho9mzJihhQsXqk+fPkpNTdWiRYuUnZ3tvKsHAADEtogD5cMPP9SDDz7ofLxgwQJJ0vTp07V+/XotXrxYLS0tmj17thobG5Wbm6vt27crOTnZ+ZxVq1YpLi5OkyZNUktLi/Lz87V+/Xr16NGjEy4JAAB0dS5jjIn2IiLV1NQkj8ejYDB4Q37cM2jJ1k5/TnQPx5aPjfYSAKDLiuT7N3+LBwAAWIdAAQAA1iFQAACAdTr1N8kC3V1XfH0Sr5sB0BVxBwUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdQgUAABgHQIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWCcu2gsAgEsNWrI12kuI2LHlY6O9BKBbIVAAoBN0xaiSCCvYix/xAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA6/DXjIFurqv+lV0AsY07KAAAwDrcQQGAGNYV77AdWz422kvA94A7KAAAwDoECgAAsA6BAgAArEOgAAAA6xAoAADAOgQKAACwDoECAACsQ6AAAADrECgAAMA6BAoAALAOgQIAAKxDoAAAAOsQKAAAwDoECgAAsA6BAgAArEOgAAAA60Q1UF544QVlZGSoZ8+eysnJ0XvvvRfN5QAAAEtELVD+8Y9/qLCwUMuWLdP+/fv1s5/9TGPGjNGJEyeitSQAAGAJlzHGROML5+bm6t5779WaNWucY3feeacefvhhlZSUdPi5TU1N8ng8CgaDSklJ6fS1DVqytdOfEwDQOY4tHxvtJUSsK35fuRH7HMn377hO/+pXobW1VVVVVVqyZEnY8YKCAlVWVrabD4VCCoVCzsfBYFDShQu9Ec6Hvr4hzwsAuH63/L//jfYSYsKN+B578Tmv5t5IVALliy++UFtbm7xeb9hxr9erQCDQbr6kpES/+93v2h1PT0+/YWsEACCWeZ67cc/d3Nwsj8fT4UxUAuUil8sV9rExpt0xSVq6dKkWLFjgfHz+/Hl9+eWX6tOnz2XnO9LU1KT09HTV1dXdkB8PdXXsT8fYn46xPx1jfzrG/nSsO+yPMUbNzc3y+/3fORuVQElLS1OPHj3a3S1paGhod1dFktxut9xud9ixH/zgB9e1hpSUlC77f/D3gf3pGPvTMfanY+xPx9ifjnX1/fmuOycXReVdPAkJCcrJyVF5eXnY8fLycuXl5UVjSQAAwCJR+xHPggULNG3aNA0dOlTDhg3Tiy++qBMnTmjWrFnRWhIAALBE1AJl8uTJOn36tH7/+9+rvr5eWVlZ2rZtmwYOHHhDv67b7dYzzzzT7kdGuID96Rj70zH2p2PsT8fYn47F2v5E7fegAAAAXAl/iwcAAFiHQAEAANYhUAAAgHUIFAAAYJ2YCpQXXnhBGRkZ6tmzp3JycvTee+9Fe0nXbefOnRo/frz8fr9cLpdef/31sPPGGBUVFcnv9ysxMVEjRozQwYMHw2ZCoZDmzZuntLQ0JSUlacKECTp58mTYTGNjo6ZNmyaPxyOPx6Np06bpq6++Cps5ceKExo8fr6SkJKWlpenJJ59Ua2vrjbjsq1ZSUqL77rtPycnJ6tevnx5++GEdPnw4bCaW92jNmjUaPHiw84ufhg0bpjfeeMM5H8t7c6mSkhK5XC4VFhY6x2J9f4qKiuRyucIePp/POR/r+/Ppp5/qN7/5jfr06aNevXrpxz/+saqqqpzzsb4/38nEiNLSUhMfH2/Wrl1rDh06ZObPn2+SkpLM8ePHo72067Jt2zazbNky8+qrrxpJpqysLOz88uXLTXJysnn11VdNdXW1mTx5sunfv79pampyZmbNmmV++MMfmvLycrNv3z7z4IMPmiFDhphvv/3WmfnFL35hsrKyTGVlpamsrDRZWVlm3Lhxzvlvv/3WZGVlmQcffNDs27fPlJeXG7/fb+bOnXvD96Ajo0ePNi+99JKpqakxBw4cMGPHjjW33HKLOXPmjDMTy3u0ZcsWs3XrVnP48GFz+PBh8/TTT5v4+HhTU1NjjIntvflvH3zwgRk0aJAZPHiwmT9/vnM81vfnmWeeMXfffbepr693Hg0NDc75WN6fL7/80gwcONA89thj5v333ze1tbVmx44d5pNPPnFmYnl/rkbMBMpPfvITM2vWrLBjd9xxh1myZEmUVtT5Lg2U8+fPG5/PZ5YvX+4c++abb4zH4zF//etfjTHGfPXVVyY+Pt6UlpY6M59++qm56aabzJtvvmmMMebQoUNGktmzZ48zs3v3biPJ/Pvf/zbGXAilm266yXz66afOzCuvvGLcbrcJBoM35HqvRUNDg5FkKioqjDHs0eX07t3b/O1vf2Nv/k9zc7PJzMw05eXlZvjw4U6gsD8XAmXIkCGXPRfr+/PUU0+ZBx544IrnY31/rkZM/IintbVVVVVVKigoCDteUFCgysrKKK3qxqutrVUgEAi7brfbreHDhzvXXVVVpXPnzoXN+P1+ZWVlOTO7d++Wx+NRbm6uM3P//ffL4/GEzWRlZYX9AajRo0crFAqF3dKMtmAwKElKTU2VxB79t7a2NpWWlurs2bMaNmwYe/N/5syZo7Fjx2rkyJFhx9mfC44cOSK/36+MjAw98sgjOnr0qCT2Z8uWLRo6dKh+/etfq1+/frrnnnu0du1a53ys78/ViIlA+eKLL9TW1tbuDxF6vd52f7CwO7l4bR1ddyAQUEJCgnr37t3hTL9+/do9f79+/cJmLv06vXv3VkJCgjV7bIzRggUL9MADDygrK0sSeyRJ1dXVuvnmm+V2uzVr1iyVlZXprrvuYm8klZaWat++fSopKWl3jv2RcnNz9fLLL+utt97S2rVrFQgElJeXp9OnT8f8/hw9elRr1qxRZmam3nrrLc2aNUtPPvmkXn75ZUn883M1ovar7qPB5XKFfWyMaXesO7qW67505nLz1zITTXPnztVHH32kXbt2tTsXy3t0++2368CBA/rqq6/06quvavr06aqoqHDOx+re1NXVaf78+dq+fbt69ux5xblY3R9JGjNmjPO/s7OzNWzYMP3oRz/Shg0bdP/990uK3f05f/68hg4dquLiYknSPffco4MHD2rNmjX67W9/68zF6v5cjZi4g5KWlqYePXq0K8WGhoZ2VdmdXHw1fUfX7fP51NraqsbGxg5nPvvss3bP//nnn4fNXPp1Ghsbde7cOSv2eN68edqyZYveeecdDRgwwDnOHl346+K33Xabhg4dqpKSEg0ZMkTPP/98zO9NVVWVGhoalJOTo7i4OMXFxamiokJ//vOfFRcX56wrVvfncpKSkpSdna0jR47E/D8//fv311133RV27M4779SJEyck8d+eqxETgZKQkKCcnByVl5eHHS8vL1deXl6UVnXjZWRkyOfzhV13a2urKioqnOvOyclRfHx82Ex9fb1qamqcmWHDhikYDOqDDz5wZt5//30Fg8GwmZqaGtXX1zsz27dvl9vtVk5Ozg29zo4YYzR37ly99tprevvtt5WRkRF2nj1qzxijUCgU83uTn5+v6upqHThwwHkMHTpUU6dO1YEDB3TrrbfG9P5cTigU0scff6z+/fvH/D8/P/3pT9v9SoP//Oc/zh/EjfX9uSrfz2txo+/i24zXrVtnDh06ZAoLC01SUpI5duxYtJd2XZqbm83+/fvN/v37jSSzcuVKs3//fuft08uXLzcej8e89tprprq62jz66KOXfRvbgAEDzI4dO8y+ffvMQw89dNm3sQ0ePNjs3r3b7N6922RnZ1/2bWz5+flm3759ZseOHWbAgAFRfxvbE088YTwej3n33XfD3gr59ddfOzOxvEdLly41O3fuNLW1teajjz4yTz/9tLnpppvM9u3bjTGxvTeX89/v4jGG/Vm4cKF59913zdGjR82ePXvMuHHjTHJysvPf1Vjenw8++MDExcWZP/7xj+bIkSPm73//u+nVq5fZtGmTMxPL+3M1YiZQjDHmL3/5ixk4cKBJSEgw9957r/NW067snXfeMZLaPaZPn26MufBWtmeeecb4fD7jdrvNz3/+c1NdXR32HC0tLWbu3LkmNTXVJCYmmnHjxpkTJ06EzZw+fdpMnTrVJCcnm+TkZDN16lTT2NgYNnP8+HEzduxYk5iYaFJTU83cuXPNN998cyMv/ztdbm8kmZdeesmZieU9evzxx51/J/r27Wvy8/OdODEmtvfmci4NlFjfn4u/tyM+Pt74/X4zceJEc/DgQed8rO/Pv/71L5OVlWXcbre54447zIsvvhh2Ptb357u4jDEmOvduAAAALi8mXoMCAAC6FgIFAABYh0ABAADWIVAAAIB1CBQAAGAdAgUAAFiHQAEAANYhUAAAgHUIFAAAYB0CBQAAWIdAAQAA1iFQAACAdf4/RbdQH0GdNcMAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "调整后数据\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAJu5JREFUeJzt3X9w1PWdx/HXmh9L4JKVhLLL1kCCk4IYDhEtGDmBAUIpP2yZHiItl1Z6QwdKjSCQDPUM3jUBroe5mhEHhyMUJsXpQThPrBA6EsqltRCCFfREa4RQ2cm0l24SwASSz/3hsb0l4cfG3exnw/Mx853x+/l+vt+8P/Nx3Jef/X736zDGGAEAAFjkjmgXAAAAcC0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOvHRLqAnOjs79cknnyg5OVkOhyPa5QAAgFtgjFFLS4u8Xq/uuOPGayQxGVA++eQTpaenR7sMAADQAw0NDbrrrrtu2CcmA0pycrKkzwaYkpIS5WoAAMCtaG5uVnp6euBz/EZiMqBc/VonJSWFgAIAQIy5ldszuEkWAABYh4ACAACsQ0ABAADWCTmgHD58WHPmzJHX65XD4dDevXu79Hnvvfc0d+5cuVwuJScna8KECTp79mzgeFtbm5YvX65BgwZpwIABmjt3rs6dO/e5BgIAAPqOkAPKhQsXNGbMGJWVlXV7/Pe//70mTpyokSNH6tChQ3r77bf1zDPPqF+/foE++fn5qqys1K5du3TkyBG1trZq9uzZ6ujo6PlIAABAn+Ewxpgen+xwqLKyUl/72tcCbQsWLFBCQoJ27NjR7Tl+v19f+MIXtGPHDj322GOS/vK7Jq+//rpmzJhx07/b3Nwsl8slv9/PUzwAAMSIUD6/w3oPSmdnp/bt26cvfelLmjFjhgYPHqzx48cHfQ1UW1ury5cvKzc3N9Dm9XqVnZ2tmpqacJYDAABiVFgDSmNjo1pbW7V+/Xp95Stf0YEDB/T1r39d8+bNU3V1tSTJ5/MpMTFRAwcODDrX7XbL5/N1e922tjY1NzcHbQAAoO8K6w+1dXZ2SpIeffRRPfXUU5Kk++67TzU1NXrppZc0adKk655rjLnuD7eUlJRo3bp14SwVAABYLKwrKIMGDVJ8fLxGjRoV1H7PPfcEnuLxeDxqb29XU1NTUJ/Gxka53e5ur1tYWCi/3x/YGhoawlk2AACwTFgDSmJioh588EG9//77Qe2nT5/WsGHDJEnjxo1TQkKCqqqqAsfPnz+vkydPKicnp9vrOp3OwM/a8/P2AAD0fSF/xdPa2qoPP/wwsF9fX68TJ04oNTVVQ4cO1apVq/TYY4/pkUce0ZQpU/TGG2/oP//zP3Xo0CFJksvl0uLFi7Vy5UqlpaUpNTVVTz/9tEaPHq1p06aFbWAAACB2hfyY8aFDhzRlypQu7Xl5eSovL5ck/du//ZtKSkp07tw5jRgxQuvWrdOjjz4a6Pvpp59q1apVqqio0KVLlzR16lS9+OKLSk9Pv6UaeMwYAIDYE8rn9+f6HZRoIaAAABB7Qvn8DutTPABgu4yCfRG57sfrZ0XkusDtipcFAgAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrhBxQDh8+rDlz5sjr9crhcGjv3r3X7btkyRI5HA6VlpYGtbe1tWn58uUaNGiQBgwYoLlz5+rcuXOhlgIAAPqokAPKhQsXNGbMGJWVld2w3969e/XWW2/J6/V2OZafn6/Kykrt2rVLR44cUWtrq2bPnq2Ojo5QywEAAH1QfKgnzJw5UzNnzrxhnz/84Q/6/ve/r/3792vWrFlBx/x+v7Zu3aodO3Zo2rRpkqSdO3cqPT1dBw8e1IwZM0ItCQAA9DFhvwels7NTixYt0qpVq3Tvvfd2OV5bW6vLly8rNzc30Ob1epWdna2amppur9nW1qbm5uagDQAA9F1hDygbNmxQfHy8fvCDH3R73OfzKTExUQMHDgxqd7vd8vl83Z5TUlIil8sV2NLT08NdNgAAsEhYA0ptba3+9V//VeXl5XI4HCGda4y57jmFhYXy+/2BraGhIRzlAgAAS4U1oPzqV79SY2Ojhg4dqvj4eMXHx+vMmTNauXKlMjIyJEkej0ft7e1qamoKOrexsVFut7vb6zqdTqWkpARtAACg7wprQFm0aJF+97vf6cSJE4HN6/Vq1apV2r9/vyRp3LhxSkhIUFVVVeC88+fP6+TJk8rJyQlnOQAAIEaF/BRPa2urPvzww8B+fX29Tpw4odTUVA0dOlRpaWlB/RMSEuTxeDRixAhJksvl0uLFi7Vy5UqlpaUpNTVVTz/9tEaPHh14qgcAANzeQg4ox44d05QpUwL7K1askCTl5eWpvLz8lq7x/PPPKz4+XvPnz9elS5c0depUlZeXKy4uLtRyAABAH+QwxphoFxGq5uZmuVwu+f1+7kcBEJKMgn0Rue7H62fdvBNwmwvl85t38QAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOvER7sAALhWRsG+aJcAIMpYQQEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArBNyQDl8+LDmzJkjr9crh8OhvXv3Bo5dvnxZa9as0ejRozVgwAB5vV793d/9nT755JOga7S1tWn58uUaNGiQBgwYoLlz5+rcuXOfezAAAKBvCDmgXLhwQWPGjFFZWVmXYxcvXtTx48f1zDPP6Pjx49qzZ49Onz6tuXPnBvXLz89XZWWldu3apSNHjqi1tVWzZ89WR0dHz0cCAAD6jJB/6n7mzJmaOXNmt8dcLpeqqqqC2l544QV9+ctf1tmzZzV06FD5/X5t3bpVO3bs0LRp0yRJO3fuVHp6ug4ePKgZM2b0YBgAAKAvifg9KH6/Xw6HQ3feeackqba2VpcvX1Zubm6gj9frVXZ2tmpqarq9Rltbm5qbm4M2AADQd0U0oHz66acqKCjQwoULlZKSIkny+XxKTEzUwIEDg/q63W75fL5ur1NSUiKXyxXY0tPTI1k2AACIsogFlMuXL2vBggXq7OzUiy++eNP+xhg5HI5ujxUWFsrv9we2hoaGcJcLAAAsEpGAcvnyZc2fP1/19fWqqqoKrJ5IksfjUXt7u5qamoLOaWxslNvt7vZ6TqdTKSkpQRsAAOi7wh5QroaTDz74QAcPHlRaWlrQ8XHjxikhISHoZtrz58/r5MmTysnJCXc5AAAgBoX8FE9ra6s+/PDDwH59fb1OnDih1NRUeb1efeMb39Dx48f12muvqaOjI3BfSWpqqhITE+VyubR48WKtXLlSaWlpSk1N1dNPP63Ro0cHnuoBAAC3t5ADyrFjxzRlypTA/ooVKyRJeXl5Kioq0quvvipJuu+++4LOe/PNNzV58mRJ0vPPP6/4+HjNnz9fly5d0tSpU1VeXq64uLgeDgMAAPQlDmOMiXYRoWpubpbL5ZLf7+d+FKAPyijYF+0SQvbx+lnRLgGwXiif37yLBwAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOvER7sAALEro2BftEsA0EexggIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOiEHlMOHD2vOnDnyer1yOBzau3dv0HFjjIqKiuT1epWUlKTJkyfr1KlTQX3a2tq0fPlyDRo0SAMGDNDcuXN17ty5zzUQAADQd4QcUC5cuKAxY8aorKys2+MbN27Upk2bVFZWpqNHj8rj8Wj69OlqaWkJ9MnPz1dlZaV27dqlI0eOqLW1VbNnz1ZHR0fPRwIAAPqM+FBPmDlzpmbOnNntMWOMSktLtXbtWs2bN0+StH37drndblVUVGjJkiXy+/3aunWrduzYoWnTpkmSdu7cqfT0dB08eFAzZsz4HMMBAAB9QVjvQamvr5fP51Nubm6gzel0atKkSaqpqZEk1dbW6vLly0F9vF6vsrOzA32u1dbWpubm5qANAAD0XWENKD6fT5LkdruD2t1ud+CYz+dTYmKiBg4ceN0+1yopKZHL5Qps6enp4SwbAABYJiJP8TgcjqB9Y0yXtmvdqE9hYaH8fn9ga2hoCFutAADAPmENKB6PR5K6rIQ0NjYGVlU8Ho/a29vV1NR03T7XcjqdSklJCdoAAEDfFdaAkpmZKY/Ho6qqqkBbe3u7qqurlZOTI0kaN26cEhISgvqcP39eJ0+eDPQBAAC3t5Cf4mltbdWHH34Y2K+vr9eJEyeUmpqqoUOHKj8/X8XFxcrKylJWVpaKi4vVv39/LVy4UJLkcrm0ePFirVy5UmlpaUpNTdXTTz+t0aNHB57qAQAAt7eQA8qxY8c0ZcqUwP6KFSskSXl5eSovL9fq1at16dIlLV26VE1NTRo/frwOHDig5OTkwDnPP/+84uPjNX/+fF26dElTp05VeXm54uLiwjAkAOh9GQX7Inbtj9fPiti1AVs5jDEm2kWEqrm5WS6XS36/n/tRgCiK5Icy/oKAgr4ilM9v3sUDAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsE58tAsAEFkZBfuiXQIAhIwVFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADW4W3GgCV46zAA/AUrKAAAwDoEFAAAYB0CCgAAsA4BBQAAWCfsAeXKlSv64Q9/qMzMTCUlJWn48OF67rnn1NnZGehjjFFRUZG8Xq+SkpI0efJknTp1KtylAACAGBX2gLJhwwa99NJLKisr03vvvaeNGzfqn//5n/XCCy8E+mzcuFGbNm1SWVmZjh49Ko/Ho+nTp6ulpSXc5QAAgBgU9oDy61//Wo8++qhmzZqljIwMfeMb31Bubq6OHTsm6bPVk9LSUq1du1bz5s1Tdna2tm/frosXL6qioiLc5QAAgBgU9oAyceJE/fKXv9Tp06clSW+//baOHDmir371q5Kk+vp6+Xw+5ebmBs5xOp2aNGmSampqur1mW1ubmpubgzYAANB3hf2H2tasWSO/36+RI0cqLi5OHR0d+tGPfqTHH39ckuTz+SRJbrc76Dy3260zZ850e82SkhKtW7cu3KUCABAQyR9L/Hj9rIhdu68K+wrKK6+8op07d6qiokLHjx/X9u3b9eMf/1jbt28P6udwOIL2jTFd2q4qLCyU3+8PbA0NDeEuGwAAWCTsKyirVq1SQUGBFixYIEkaPXq0zpw5o5KSEuXl5cnj8Uj6bCVlyJAhgfMaGxu7rKpc5XQ65XQ6w10qAACwVNhXUC5evKg77gi+bFxcXOAx48zMTHk8HlVVVQWOt7e3q7q6Wjk5OeEuBwAAxKCwr6DMmTNHP/rRjzR06FDde++9qqur06ZNm/TEE09I+uyrnfz8fBUXFysrK0tZWVkqLi5W//79tXDhwnCXAwAAYlDYA8oLL7ygZ555RkuXLlVjY6O8Xq+WLFmif/iHfwj0Wb16tS5duqSlS5eqqalJ48eP14EDB5ScnBzucgAAQAxyGGNMtIsIVXNzs1wul/x+v1JSUqJdDhAWkXyCALGNJ0B6B0/xRF4on9+8iwcAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdeKjXQAQS3jjMAD0DlZQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADr8ENtAHAbi9SPD368flZErovbBysoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALBORALKH/7wB33rW99SWlqa+vfvr/vuu0+1tbWB48YYFRUVyev1KikpSZMnT9apU6ciUQoAAIhBYQ8oTU1Nevjhh5WQkKBf/OIXevfdd/Uv//IvuvPOOwN9Nm7cqE2bNqmsrExHjx6Vx+PR9OnT1dLSEu5yAABADAr724w3bNig9PR0bdu2LdCWkZER+GdjjEpLS7V27VrNmzdPkrR9+3a53W5VVFRoyZIl4S4JAADEmLCvoLz66qt64IEH9Ld/+7caPHiwxo4dq5dffjlwvL6+Xj6fT7m5uYE2p9OpSZMmqaampttrtrW1qbm5OWgDAAB9V9gDykcffaTNmzcrKytL+/fv1/e+9z394Ac/0E9/+lNJks/nkyS53e6g89xud+DYtUpKSuRyuQJbenp6uMsGAAAWCXtA6ezs1P3336/i4mKNHTtWS5Ys0d///d9r8+bNQf0cDkfQvjGmS9tVhYWF8vv9ga2hoSHcZQMAAIuEPaAMGTJEo0aNCmq75557dPbsWUmSx+ORpC6rJY2NjV1WVa5yOp1KSUkJ2gAAQN8V9oDy8MMP6/333w9qO336tIYNGyZJyszMlMfjUVVVVeB4e3u7qqurlZOTE+5yAABADAr7UzxPPfWUcnJyVFxcrPnz5+u3v/2ttmzZoi1btkj67Kud/Px8FRcXKysrS1lZWSouLlb//v21cOHCcJcDADEvo2BftEsAel3YA8qDDz6oyspKFRYW6rnnnlNmZqZKS0v1zW9+M9Bn9erVunTpkpYuXaqmpiaNHz9eBw4cUHJycrjLAQAAMSjsAUWSZs+erdmzZ1/3uMPhUFFRkYqKiiLx5wEAQIzjXTwAAMA6BBQAAGCdiHzFAwC4vUXyxt6P18+K2LVhD1ZQAACAdQgoAADAOgQUAABgHe5BQZ/ED1sBQGxjBQUAAFiHFRTcVKRWI7gTHwBwPaygAAAA6xBQAACAdfiKp4/gplAAQF/CCgoAALAOAQUAAFiHgAIAAKzDPSgAAEQYP9cQOlZQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsw2PGiBp+nh8AcD2soAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdSIeUEpKSuRwOJSfnx9oM8aoqKhIXq9XSUlJmjx5sk6dOhXpUgAAQIyIaEA5evSotmzZor/+678Oat+4caM2bdqksrIyHT16VB6PR9OnT1dLS0skywEAADEiYgGltbVV3/zmN/Xyyy9r4MCBgXZjjEpLS7V27VrNmzdP2dnZ2r59uy5evKiKiopIlQMAAGJIxALKsmXLNGvWLE2bNi2ovb6+Xj6fT7m5uYE2p9OpSZMmqaamJlLlAACAGBIfiYvu2rVLx48f19GjR7sc8/l8kiS32x3U7na7debMmW6v19bWpra2tsB+c3NzGKsFAAC2CfsKSkNDg5588knt3LlT/fr1u24/h8MRtG+M6dJ2VUlJiVwuV2BLT08Pa80AAMAuYQ8otbW1amxs1Lhx4xQfH6/4+HhVV1frJz/5ieLj4wMrJ1dXUq5qbGzssqpyVWFhofx+f2BraGgId9kAAMAiYf+KZ+rUqXrnnXeC2r7zne9o5MiRWrNmjYYPHy6Px6OqqiqNHTtWktTe3q7q6mpt2LCh22s6nU45nc5wlwoAACwV9oCSnJys7OzsoLYBAwYoLS0t0J6fn6/i4mJlZWUpKytLxcXF6t+/vxYuXBjucgAAQAyKyE2yN7N69WpdunRJS5cuVVNTk8aPH68DBw4oOTk5GuUAAADL9EpAOXToUNC+w+FQUVGRioqKeuPPAwCAGMO7eAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWCcqjxkDANBTGQX7ol0CegErKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA68dEu4HaSUbAv2iUAABATWEEBAADWIaAAAADrEFAAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOuEPaCUlJTowQcfVHJysgYPHqyvfe1rev/994P6GGNUVFQkr9erpKQkTZ48WadOnQp3KQAAIEaFPaBUV1dr2bJl+s1vfqOqqipduXJFubm5unDhQqDPxo0btWnTJpWVleno0aPyeDyaPn26Wlpawl0OAACIQWF/WeAbb7wRtL9t2zYNHjxYtbW1euSRR2SMUWlpqdauXat58+ZJkrZv3y63262KigotWbIk3CUBAIAYE/F7UPx+vyQpNTVVklRfXy+fz6fc3NxAH6fTqUmTJqmmpqbba7S1tam5uTloAwAAfVdEA4oxRitWrNDEiROVnZ0tSfL5fJIkt9sd1NftdgeOXaukpEQulyuwpaenR7JsAAAQZRENKN///vf1u9/9Tj/72c+6HHM4HEH7xpgubVcVFhbK7/cHtoaGhojUCwAA7BD2e1CuWr58uV599VUdPnxYd911V6Dd4/FI+mwlZciQIYH2xsbGLqsqVzmdTjmdzkiVCgAALBP2gGKM0fLly1VZWalDhw4pMzMz6HhmZqY8Ho+qqqo0duxYSVJ7e7uqq6u1YcOGcJfTIxkF+6JdAgAAt7WwB5Rly5apoqJC//Ef/6Hk5OTAfSUul0tJSUlyOBzKz89XcXGxsrKylJWVpeLiYvXv318LFy4MdzkAACAGhT2gbN68WZI0efLkoPZt27bp29/+tiRp9erVunTpkpYuXaqmpiaNHz9eBw4cUHJycrjLAQAAMSgiX/HcjMPhUFFRkYqKisL95wEAQB/Au3gAAIB1CCgAAMA6BBQAAGAdAgoAALAOAQUAAFiHgAIAAKxDQAEAANYhoAAAAOsQUAAAgHUIKAAAwDoEFAAAYB0CCgAAsA4BBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB14qNdAAAA6JmMgn0Ru/bH62dF7Nq3ghUUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADWIaAAAADrEFAAAIB1CCgAAMA6UQ0oL774ojIzM9WvXz+NGzdOv/rVr6JZDgAAsETUAsorr7yi/Px8rV27VnV1dfqbv/kbzZw5U2fPno1WSQAAwBJRCyibNm3S4sWL9d3vflf33HOPSktLlZ6ers2bN0erJAAAYImovIunvb1dtbW1KigoCGrPzc1VTU1Nl/5tbW1qa2sL7Pv9fklSc3NzROrrbLsYkesCABArIvEZe/Waxpib9o1KQPnjH/+ojo4Oud3uoHa32y2fz9elf0lJidatW9elPT09PWI1AgBwO3OVRu7aLS0tcrlcN+wT1bcZOxyOoH1jTJc2SSosLNSKFSsC+52dnfqf//kfpaWldds/VM3NzUpPT1dDQ4NSUlI+9/Vsw/hiG+OLXX15bBLji3XRGJ8xRi0tLfJ6vTftG5WAMmjQIMXFxXVZLWlsbOyyqiJJTqdTTqczqO3OO+8Me10pKSl98l/CqxhfbGN8sasvj01ifLGut8d3s5WTq6Jyk2xiYqLGjRunqqqqoPaqqirl5OREoyQAAGCRqH3Fs2LFCi1atEgPPPCAHnroIW3ZskVnz57V9773vWiVBAAALBG1gPLYY4/pT3/6k5577jmdP39e2dnZev311zVs2LBer8XpdOrZZ5/t8jVSX8H4Yhvji119eWwS44t1to/PYW7lWR8AAIBexLt4AACAdQgoAADAOgQUAABgHQIKAACwTp8PKBkZGXI4HF22ZcuWXfec6upqjRs3Tv369dPw4cP10ksv9WLFoQl1fIcOHeq2/3//93/3cuW35sqVK/rhD3+ozMxMJSUlafjw4XruuefU2dl5w/NiZQ57Mr5YmsOWlhbl5+dr2LBhSkpKUk5Ojo4ePXrDc2Jl7qTQx2fz3B0+fFhz5syR1+uVw+HQ3r17g44bY1RUVCSv16ukpCRNnjxZp06duul1d+/erVGjRsnpdGrUqFGqrKyM0AhuLBLjKy8v73Y+P/300wiOpHs3G9+ePXs0Y8YMDRo0SA6HQydOnLil60Z1/kwf19jYaM6fPx/YqqqqjCTz5ptvdtv/o48+Mv379zdPPvmkeffdd83LL79sEhISzL//+7/3buG3KNTxvfnmm0aSef/994POu3LlSu8Wfov+6Z/+yaSlpZnXXnvN1NfXm5///Ofmr/7qr0xpael1z4mlOezJ+GJpDufPn29GjRplqqurzQcffGCeffZZk5KSYs6dO9dt/1iaO2NCH5/Nc/f666+btWvXmt27dxtJprKyMuj4+vXrTXJystm9e7d55513zGOPPWaGDBlimpubr3vNmpoaExcXZ4qLi817771niouLTXx8vPnNb34T4dF0FYnxbdu2zaSkpATN5fnz5yM8ku7dbHw//elPzbp168zLL79sJJm6urqbXjPa89fnA8q1nnzySXP33Xebzs7Obo+vXr3ajBw5MqhtyZIlZsKECb1R3ud2s/Fd/Q9kU1NT7xbWQ7NmzTJPPPFEUNu8efPMt771reueE0tz2JPxxcocXrx40cTFxZnXXnstqH3MmDFm7dq13Z4TS3PXk/HFytxd+wHX2dlpPB6PWb9+faDt008/NS6Xy7z00kvXvc78+fPNV77ylaC2GTNmmAULFoS95lCEa3zbtm0zLpcrgpX2THcB5ar6+vpbDijRnr8+/xXP/9fe3q6dO3fqiSeeuO5LBn/9618rNzc3qG3GjBk6duyYLl++3Btl9titjO+qsWPHasiQIZo6darefPPNXqowdBMnTtQvf/lLnT59WpL09ttv68iRI/rqV7963XNiaQ57Mr6rbJ/DK1euqKOjQ/369QtqT0pK0pEjR7o9J5bmrifju8r2ubtWfX29fD5f0Nw4nU5NmjRJNTU11z3vevN5o3Oioafjk6TW1lYNGzZMd911l2bPnq26urpIl9troj1/t1VA2bt3r/785z/r29/+9nX7+Hy+Li8sdLvdunLliv74xz9GuMLP51bGN2TIEG3ZskW7d+/Wnj17NGLECE2dOlWHDx/uvUJDsGbNGj3++OMaOXKkEhISNHbsWOXn5+vxxx+/7jmxNIc9GV+szGFycrIeeugh/eM//qM++eQTdXR0aOfOnXrrrbd0/vz5bs+JpbnryfhiZe6udfXFrt3NzbUvfb32vFDPiYaejm/kyJEqLy/Xq6++qp/97Gfq16+fHn74YX3wwQcRrbe3RHv+ovZT99GwdetWzZw586aveb529cH834/t3mxVItpuZXwjRozQiBEjAvsPPfSQGhoa9OMf/1iPPPJIb5QZkldeeUU7d+5URUWF7r33Xp04cUL5+fnyer3Ky8u77nmxMoc9GV8szeGOHTv0xBNP6Itf/KLi4uJ0//33a+HChTp+/Ph1z4mVuZNCH18szV13upubm81LT86JllBrnTBhgiZMmBDYf/jhh3X//ffrhRde0E9+8pOI1dmbojl/t80KypkzZ3Tw4EF997vfvWE/j8fTJR02NjYqPj5eaWlpkSzxc7nV8XVnwoQJ1ib+VatWqaCgQAsWLNDo0aO1aNEiPfXUUyopKbnuObE0hz0ZX3dsncO7775b1dXVam1tVUNDg37729/q8uXLyszM7LZ/LM2dFPr4umPr3P1/Ho9Hkrqdm2v/D/va80I9Jxp6Or5r3XHHHXrwwQetn89bFe35u20CyrZt2zR48GDNmjXrhv0eeughVVVVBbUdOHBADzzwgBISEiJZ4udyq+PrTl1dnYYMGRKBqj6/ixcv6o47gv81jYuLu+FjuLE0hz0ZX3dsnkNJGjBggIYMGaKmpibt379fjz76aLf9Ymnu/r9bHV93bJ87ScrMzJTH4wmam/b2dlVXVysnJ+e6511vPm90TjT0dHzXMsboxIkT1s/nrYr6/PXKrbhR1tHRYYYOHWrWrFnT5VhBQYFZtGhRYP/qY45PPfWUeffdd83WrVutfszRmNDG9/zzz5vKykpz+vRpc/LkSVNQUGAkmd27d/dmybcsLy/PfPGLXww8hrtnzx4zaNAgs3r16kCfWJ7DnowvlubwjTfeML/4xS/MRx99ZA4cOGDGjBljvvzlL5v29nZjTGzPnTGhj8/muWtpaTF1dXWmrq7OSDKbNm0ydXV15syZM8aYzx7DdblcZs+ePeadd94xjz/+eJfHcBctWmQKCgoC+//1X/9l4uLizPr16817771n1q9fH7XHjCMxvqKiIvPGG2+Y3//+96aurs585zvfMfHx8eatt96ybnx/+tOfTF1dndm3b5+RZHbt2mXq6uqCHou2bf5ui4Cyf//+wG8PXCsvL89MmjQpqO3QoUNm7NixJjEx0WRkZJjNmzf3UqU9E8r4NmzYYO6++27Tr18/M3DgQDNx4kSzb9++Xqw2NM3NzebJJ580Q4cONf369TPDhw83a9euNW1tbYE+sTyHPRlfLM3hK6+8YoYPH24SExONx+Mxy5YtM3/+858Dx2N57owJfXw2z93VR6Cv3fLy8owxnz2K++yzzxqPx2OcTqd55JFHzDvvvBN0jUmTJgX6X/Xzn//cjBgxwiQkJJiRI0dGLYxFYnz5+flm6NChJjEx0XzhC18wubm5pqamphdH9Rc3G9+2bdu6Pf7ss88GrmHb/DmM+b870AAAACxx29yDAgAAYgcBBQAAWIeAAgAArENAAQAA1iGgAAAA6xBQAACAdQgoAADAOgQUAABgHQIKAACwDgEFAABYh4ACAACsQ0ABAADW+V+h2/yeFSpd3wAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(data['charges']) # 画y的柱状图\n",
    "plt.show()\n",
    "# 线性回归时，要求数据y服从正态分布，才能有损失MSE\n",
    "# 原始数据数学上叫右偏，需要调整成正态分布\n",
    "print('调整后数据')\n",
    "plt.hist(np.log(data['charges']),bins=20) # 画y的柱状图  bins控制图粗细\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68450a62-41af-44cf-88f6-c7f064967e78",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "id": "da828e75-fa67-47c5-be5c-55dec2ccca44",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 步骤3：特征工程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "5dcb6a55-d4a2-4937-8708-ea64957f23c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特征x有一些非数值的维度，比如sex  smoker，该如何量化呢\n",
    "# 可以采用One-Hot独热编码，可由多种工具实现\n",
    "# 将非数值特征按照类似直接编码的原则进行编码\n",
    "# 比如西南方向编码为1 0 0 0  因为有4个方向，东南可以为 0 1 0 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "14d49b8d-7e7c-4bb7-a911-f9410aab544b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>bmi</th>\n",
       "      <th>children</th>\n",
       "      <th>charges</th>\n",
       "      <th>sex_female</th>\n",
       "      <th>sex_male</th>\n",
       "      <th>smoker_no</th>\n",
       "      <th>smoker_yes</th>\n",
       "      <th>region_northeast</th>\n",
       "      <th>region_northwest</th>\n",
       "      <th>region_southeast</th>\n",
       "      <th>region_southwest</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "      <td>27.900</td>\n",
       "      <td>0</td>\n",
       "      <td>16884.92400</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18</td>\n",
       "      <td>33.770</td>\n",
       "      <td>1</td>\n",
       "      <td>1725.55230</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28</td>\n",
       "      <td>33.000</td>\n",
       "      <td>3</td>\n",
       "      <td>4449.46200</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>22.705</td>\n",
       "      <td>0</td>\n",
       "      <td>21984.47061</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>32</td>\n",
       "      <td>28.880</td>\n",
       "      <td>0</td>\n",
       "      <td>3866.85520</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age     bmi  children      charges  sex_female  sex_male  smoker_no  \\\n",
       "0   19  27.900         0  16884.92400        True     False      False   \n",
       "1   18  33.770         1   1725.55230       False      True       True   \n",
       "2   28  33.000         3   4449.46200       False      True       True   \n",
       "3   33  22.705         0  21984.47061       False      True       True   \n",
       "4   32  28.880         0   3866.85520       False      True       True   \n",
       "\n",
       "   smoker_yes  region_northeast  region_northwest  region_southeast  \\\n",
       "0        True             False             False             False   \n",
       "1       False             False             False              True   \n",
       "2       False             False             False              True   \n",
       "3       False             False              True             False   \n",
       "4       False             False              True             False   \n",
       "\n",
       "   region_southwest  \n",
       "0              True  \n",
       "1             False  \n",
       "2             False  \n",
       "3             False  \n",
       "4             False  "
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=pd.get_dummies(data)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "3a5d0e4e-8ddc-4f26-bf52-2d6f8316484c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 从数据集中分理出x和y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "5f88d386-4ffc-416d-bb71-51ab4e1fe57d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>bmi</th>\n",
       "      <th>children</th>\n",
       "      <th>sex_female</th>\n",
       "      <th>sex_male</th>\n",
       "      <th>smoker_no</th>\n",
       "      <th>smoker_yes</th>\n",
       "      <th>region_northeast</th>\n",
       "      <th>region_northwest</th>\n",
       "      <th>region_southeast</th>\n",
       "      <th>region_southwest</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "      <td>27.900</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18</td>\n",
       "      <td>33.770</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28</td>\n",
       "      <td>33.000</td>\n",
       "      <td>3</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>22.705</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>32</td>\n",
       "      <td>28.880</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age     bmi  children  sex_female  sex_male  smoker_no  smoker_yes  \\\n",
       "0   19  27.900         0        True     False      False        True   \n",
       "1   18  33.770         1       False      True       True       False   \n",
       "2   28  33.000         3       False      True       True       False   \n",
       "3   33  22.705         0       False      True       True       False   \n",
       "4   32  28.880         0       False      True       True       False   \n",
       "\n",
       "   region_northeast  region_northwest  region_southeast  region_southwest  \n",
       "0             False             False             False              True  \n",
       "1             False             False              True             False  \n",
       "2             False             False              True             False  \n",
       "3             False              True             False             False  \n",
       "4             False              True             False             False  "
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x=data.drop('charges',axis=1) # 按列提取x,drop就是把charges抛掉\n",
    "y=data['charges'] # charges就是y\n",
    "x.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "2bcb3cbe-c5f5-4982-9747-3a01331c50da",
   "metadata": {},
   "outputs": [],
   "source": [
    "x.fillna(0,inplace=True) #若data数据中没有数据，则用0填充替换\n",
    "y.fillna(0,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "90f2891d-e957-493e-b1f0-256bcd9dac5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split # 分割训练集和测试集，测试集占30%\n",
    "x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "9614de26-860a-4240-a71d-b262a3e804c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler # 对训练集进行标准归一化\n",
    "scaler=StandardScaler(with_mean=True,with_std=True).fit(x_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "6d245444-9ccb-4c90-b7d5-948d74f19ddb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-1.09021406, -0.89589774, -0.91138738, ..., -0.56254395,\n",
       "        -0.60368161, -0.58228498],\n",
       "       [-0.8768221 , -0.07234961,  1.57324928, ..., -0.56254395,\n",
       "        -0.60368161,  1.71737215],\n",
       "       [ 0.33239902, -1.4091827 ,  0.74503706, ..., -0.56254395,\n",
       "        -0.60368161, -0.58228498],\n",
       "       ...,\n",
       "       [ 1.75501209, -1.26919589, -0.91138738, ..., -0.56254395,\n",
       "         1.65650234, -0.58228498],\n",
       "       [ 1.39935882,  0.09956402, -0.91138738, ...,  1.77763888,\n",
       "        -0.60368161, -0.58228498],\n",
       "       [-0.30777687, -1.19142544,  0.74503706, ..., -0.56254395,\n",
       "        -0.60368161, -0.58228498]], shape=(402, 11))"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train_scaled=scaler.transform(x_train) # 接收训练集的归一化结果\n",
    "x_test_scaled=scaler.transform(x_test) # 测试集用训练集的scaler进行归一化，即沿用训练集的mean和std\n",
    "x_test_scaled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "15f29c2a-f36d-4e52-a960-78e4e210e8c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import PolynomialFeatures # 进行多项式升维\n",
    "poly_features=PolynomialFeatures(degree=2,include_bias=False)\n",
    "x_train_scaled=poly_features.fit_transform(x_train_scaled)\n",
    "x_test_scaled=poly_features.fit_transform(x_test_scaled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae1f58a6-009b-415e-804c-05cbd08e5230",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "b0e79762-b3b4-47e6-baef-b23b3a9f2cf0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 步骤4：模型训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "c4a519f2-7c1f-4edc-8f53-40636895a8f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 线性回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "5cfc885f-9901-43b1-9cea-5ba9c7521ee5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "reg=LinearRegression()\n",
    "reg.fit(x_train_scaled,np.log1p(y_train))# 线性回归要求y服从正态分布\n",
    "y_predict_linear=reg.predict(x_test_scaled) # 预测测试集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "a4bb4a1b-ee78-4ed6-8c40-2fd2ed842980",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ridge回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "f8fd8cda-90ac-478b-aadb-36e90d51dd19",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import Ridge\n",
    "ridge = Ridge(alpha=0.4)\n",
    "ridge.fit(x_train_scaled, np.log1p(y_train))\n",
    "y_predict_ridge = ridge.predict(x_test_scaled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "e58b22ba-f1d1-4985-9ef4-c06fd26a2fd9",
   "metadata": {},
   "outputs": [],
   "source": [
    "#  GradientBoostingRegressor梯度提升回归"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "b4b9d382-a766-4bbb-9508-b5c8190a3ada",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "booster = GradientBoostingRegressor()\n",
    "booster.fit(x_train_scaled, np.log1p(y_train))\n",
    "y_predict_boost = ridge.predict(x_test_scaled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12270f06-6f5f-452a-9bb4-066c5aeccfee",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "161e70ca-2930-4297-9645-e07145f8c2c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 步骤5：模型评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "2277fd74-c547-4eb3-af5a-70fdde970d38",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "6393c58a-c136-4d30-87f6-af4fc8ae76bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 线性回归评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "dd4155f2-76cc-45f4-a9a9-aabdfc00dc47",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(np.float64(0.36587631198890436),\n",
       " np.float64(1.2539180746113854),\n",
       " np.float64(4978.600575434705),\n",
       " np.float64(5043.77788016613))"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_rmse_train=np.sqrt(mean_squared_error(y_true=np.log1p(y_train),y_pred=reg.predict(x_train_scaled))) # y_train标准化后的根号下MSE\n",
    "log_rmse_test=np.sqrt(mean_squared_error(y_true=np.log1p(y_test),y_pred=y_predict_linear)) # y_test标准化后的根号下MSE\n",
    "rmse_train=np.sqrt(mean_squared_error(y_true=y_train,y_pred=np.exp(reg.predict(x_train_scaled)))) # 原始y_train的MSE\n",
    "rmse_test=np.sqrt(mean_squared_error(y_true=y_test,y_pred=np.exp(reg.predict(x_test_scaled)))) # 原始y_test的MSE\n",
    "log_rmse_train,log_rmse_test,rmse_train,rmse_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "008adbcf-0fd2-4fc4-b014-66f8f35ddfc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ridge回归评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "f82e4c52-a9fd-4bf1-a268-c6417c7e731d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(np.float64(0.36587638444697484),\n",
       " np.float64(0.36541778043788653),\n",
       " np.float64(4978.404251842419),\n",
       " np.float64(5043.947511171133))"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_rmse_train = np.sqrt(mean_squared_error(y_true=np.log1p(y_train), y_pred=ridge.predict(x_train_scaled)))\n",
    "log_rmse_test = np.sqrt(mean_squared_error(y_true=np.log1p(y_test), y_pred=y_predict_ridge))\n",
    "rmse_train = np.sqrt(mean_squared_error(y_true=y_train, y_pred=np.exp(ridge.predict(x_train_scaled))))\n",
    "rmse_test = np.sqrt(mean_squared_error(y_true=y_test, y_pred=np.exp(ridge.predict(x_test_scaled))))\n",
    "log_rmse_train, log_rmse_test, rmse_train, rmse_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "0d51e8d3-fd36-4f65-8234-bbbcfe85899b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#  GradientBoostingRegressor梯度提升回归评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "44193d9e-397e-4173-b152-ea9663273b59",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(np.float64(0.26197558771323015),\n",
       " np.float64(0.36541778043788653),\n",
       " np.float64(3631.181921832026),\n",
       " np.float64(4740.4634659264))"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "log_rmse_train = np.sqrt(mean_squared_error(y_true=np.log1p(y_train), y_pred=booster.predict(x_train_scaled)))\n",
    "log_rmse_test = np.sqrt(mean_squared_error(y_true=np.log1p(y_test), y_pred=y_predict_boost))\n",
    "rmse_train = np.sqrt(mean_squared_error(y_true=y_train, y_pred=np.exp(booster.predict(x_train_scaled))))\n",
    "rmse_test = np.sqrt(mean_squared_error(y_true=y_test, y_pred=np.exp(booster.predict(x_test_scaled))))\n",
    "log_rmse_train, log_rmse_test, rmse_train, rmse_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "c4aac864-40a5-4ec0-a89b-6a336ce1d55a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 评估结果\n",
    "# 普通线性回归，Ridge回归误差都在5000左右，对比数据而言，5000元的误差是很糟糕的\n",
    "# 梯度提升算法相对有所改善，但还是效果不佳\n",
    "# 优化办法  在数据的特征工程和训练算法方向下手"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b3178d8-616e-4359-9e6d-6549317cc217",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
